Before you start

Set my seed

# Any number can be chose
set.seed(567890)

Load Libraries

# Efficient package loading with pacman 
# Don't forget to install pacman and DT if you don't have it yet. :) 
pacman::p_load(tidyverse, devtools, dada2, phyloseq, patchwork, DT, 
               install = FALSE)

Goals for this file

  1. Use raw fastq and generate the quality plots to asses the quality of reads

  2. Filter and trim out bad sequences and bases from our sequencing files

  3. Write out fastq files with high quality sequences

  4. Evaluate the quality from our filter and trim.

  5. Infer errors on forward and reverse reads individually

  6. Identified ASVs on forward and reverse reads separately using the error model.

  7. Merge forward and reverse ASVs into “contigous ASVs”.

  8. Generate ASV count table. (otu_table input for phyloseq.).

Output that we need:

  1. ASV count table: otu_table

  2. Taxonomy table tax_table

  3. Sample information: sample_table track the reads lost throughout DADA2 workflow.

Load Data

#Set the raw fastq path to the raw sequencing files
#Path to the fastq files
raw_fastqs_path <- "data/01_DADA2/01_raw_gzipped_fastqs"

#What files are in this path (Intuition check)
list.files(raw_fastqs_path)
##  [1] "568_4_S245_L001_R1_001.fastq.gz" "568_4_S245_L001_R2_001.fastq.gz"
##  [3] "568_5_S246_L001_R1_001.fastq.gz" "568_5_S246_L001_R2_001.fastq.gz"
##  [5] "581_5_S247_L001_R1_001.fastq.gz" "581_5_S247_L001_R2_001.fastq.gz"
##  [7] "611_5_S248_L001_R1_001.fastq.gz" "611_5_S248_L001_R2_001.fastq.gz"
##  [9] "E03_5_S146_L001_R1_001.fastq.gz" "E03_5_S146_L001_R2_001.fastq.gz"
## [11] "E05_5_S147_L001_R1_001.fastq.gz" "E05_5_S147_L001_R2_001.fastq.gz"
## [13] "E1_4_S140_L001_R1_001.fastq.gz"  "E1_4_S140_L001_R2_001.fastq.gz" 
## [15] "E1_5_S141_L001_R1_001.fastq.gz"  "E1_5_S141_L001_R2_001.fastq.gz" 
## [17] "E2_4_S142_L001_R1_001.fastq.gz"  "E2_4_S142_L001_R2_001.fastq.gz" 
## [19] "E2_5_S143_L001_R1_001.fastq.gz"  "E2_5_S143_L001_R2_001.fastq.gz" 
## [21] "E3_4_S144_L001_R1_001.fastq.gz"  "E3_4_S144_L001_R2_001.fastq.gz" 
## [23] "E3_5_S145_L001_R1_001.fastq.gz"  "E3_5_S145_L001_R2_001.fastq.gz" 
## [25] "Neg1_S148_L001_R1_001.fastq.gz"  "Neg1_S148_L001_R2_001.fastq.gz" 
## [27] "Neg2_S249_L001_R1_001.fastq.gz"  "Neg2_S249_L001_R2_001.fastq.gz"
#How many files are there?
str(list.files(raw_fastqs_path))
##  chr [1:28] "568_4_S245_L001_R1_001.fastq.gz" ...
#Create a vector of forward reads
forward_reads <- list.files(raw_fastqs_path, pattern = "_R1_001.fastq.gz", full.names = TRUE) 
#Intuition check
head(forward_reads)
## [1] "data/01_DADA2/01_raw_gzipped_fastqs/568_4_S245_L001_R1_001.fastq.gz"
## [2] "data/01_DADA2/01_raw_gzipped_fastqs/568_5_S246_L001_R1_001.fastq.gz"
## [3] "data/01_DADA2/01_raw_gzipped_fastqs/581_5_S247_L001_R1_001.fastq.gz"
## [4] "data/01_DADA2/01_raw_gzipped_fastqs/611_5_S248_L001_R1_001.fastq.gz"
## [5] "data/01_DADA2/01_raw_gzipped_fastqs/E03_5_S146_L001_R1_001.fastq.gz"
## [6] "data/01_DADA2/01_raw_gzipped_fastqs/E05_5_S147_L001_R1_001.fastq.gz"
#Create a vector of reverse reads
reverse_reads <-list.files(raw_fastqs_path, pattern = "_R2_001.fastq.gz",
                           full.names = TRUE)
#Intuition check
head(reverse_reads)
## [1] "data/01_DADA2/01_raw_gzipped_fastqs/568_4_S245_L001_R2_001.fastq.gz"
## [2] "data/01_DADA2/01_raw_gzipped_fastqs/568_5_S246_L001_R2_001.fastq.gz"
## [3] "data/01_DADA2/01_raw_gzipped_fastqs/581_5_S247_L001_R2_001.fastq.gz"
## [4] "data/01_DADA2/01_raw_gzipped_fastqs/611_5_S248_L001_R2_001.fastq.gz"
## [5] "data/01_DADA2/01_raw_gzipped_fastqs/E03_5_S146_L001_R2_001.fastq.gz"
## [6] "data/01_DADA2/01_raw_gzipped_fastqs/E05_5_S147_L001_R2_001.fastq.gz"
#Any duplicates between forward and reverse reads
any(duplicated(c(forward_reads, reverse_reads)))
## [1] FALSE

Assess Raw Read Quality

Evaluate raw sequence quality

Let’s see the quality of the raw reads before we trim

Plot 12 random samples of plots

# Randomly select 12 samples from dataset to evaluate 
# Selecting 12 is typically better than 2 (like we did in class for efficiency)
random_samples <- sample(1:length(reverse_reads), size = 14)
random_samples
##  [1]  6  1 14 12 11  4  9 10 13  7  2  8  3  5
# Calculate and plot quality of these two samples
forward_filteredQual_plot_14 <- plotQualityProfile(forward_reads[random_samples]) + 
  labs(title = "Forward Read Raw Quality")

reverse_filteredQual_plot_14 <- plotQualityProfile(reverse_reads[random_samples]) + 
  labs(title = "Reverse Read Raw Quality")


# Plot them together with patchwork
forward_filteredQual_plot_14 + reverse_filteredQual_plot_14

Aggregated Raw Quality Plots

# Aggregate all QC plots 
plotQualityProfile(forward_reads, aggregate = TRUE) + 
  plotQualityProfile(reverse_reads, aggregate = TRUE)

Prepare a placeholder for filtered reads

# vector of our samples, extract the sample information from our file
samples <- sapply(strsplit(basename(forward_reads), "_"), function(x)paste(x[1],x[2],sep="_"))
#Intuition check
head(samples)
## [1] "568_4" "568_5" "581_5" "611_5" "E03_5" "E05_5"
#place filtered reads into filtered_fastqs_path
filtered_fastqs_path <- "data/01_DADA2/02_filtered_fastqs"
filtered_fastqs_path
## [1] "data/01_DADA2/02_filtered_fastqs"
# create 2 variables : filtered_F, filtered_R
filtered_forward_reads <- file.path(filtered_fastqs_path, paste0(samples, "_R1_filtered.fastq.gz"))

#Intuition check
head(filtered_forward_reads)
## [1] "data/01_DADA2/02_filtered_fastqs/568_4_R1_filtered.fastq.gz"
## [2] "data/01_DADA2/02_filtered_fastqs/568_5_R1_filtered.fastq.gz"
## [3] "data/01_DADA2/02_filtered_fastqs/581_5_R1_filtered.fastq.gz"
## [4] "data/01_DADA2/02_filtered_fastqs/611_5_R1_filtered.fastq.gz"
## [5] "data/01_DADA2/02_filtered_fastqs/E03_5_R1_filtered.fastq.gz"
## [6] "data/01_DADA2/02_filtered_fastqs/E05_5_R1_filtered.fastq.gz"
length(filtered_forward_reads)
## [1] 14
filtered_reverse_reads <- file.path(filtered_fastqs_path, paste0(samples, "_R2_filtered.fastq.gz"))

#Intuition check
head(filtered_reverse_reads)
## [1] "data/01_DADA2/02_filtered_fastqs/568_4_R2_filtered.fastq.gz"
## [2] "data/01_DADA2/02_filtered_fastqs/568_5_R2_filtered.fastq.gz"
## [3] "data/01_DADA2/02_filtered_fastqs/581_5_R2_filtered.fastq.gz"
## [4] "data/01_DADA2/02_filtered_fastqs/611_5_R2_filtered.fastq.gz"
## [5] "data/01_DADA2/02_filtered_fastqs/E03_5_R2_filtered.fastq.gz"
## [6] "data/01_DADA2/02_filtered_fastqs/E05_5_R2_filtered.fastq.gz"
length(filtered_reverse_reads)
## [1] 14

Filter and Trim Reads

Parameters of filter and trim DEPEND ON THE DATASET

  • maxN = number of N bases. Remove all Ns from the data.
  • maxEE = quality filtering threshold applied to expected errors. By default, all expected errors. Mar recommends using c(1,1). Here, if there is maxEE expected errors, its okay. If more, throw away sequence.
  • trimLeft = trim certain number of base pairs on start of each read
  • truncQ = truncate reads at the first instance of a quality score less than or equal to selected number. Chose 2
  • rm.phix = remove phi x
  • compress = make filtered files .gzipped
  • multithread = multithread
#Assign a vector to filtered reads
#Trim out poor bases, none in this instance.
#Write out filtered fastq files
filtered_reads <-
  filterAndTrim(fwd = forward_reads, filt = filtered_forward_reads,
              rev = reverse_reads, filt.rev = filtered_reverse_reads,
              truncLen = c(245,230), trimLeft = c(9,9),
              maxN = 0, maxEE = c(1, 1),truncQ = 2, rm.phix = TRUE,
              compress = TRUE, multithread = TRUE)

#These files are described in Kozich et al 2013 AEM
# Describes library prep
#Forward and reverse read have full overlap
# 515F and 806R

Assess trimmed read quality

# Plot the 12 random samples after QC
forward_filteredQual_plot_14 <- 
  plotQualityProfile(filtered_forward_reads[random_samples]) + 
  labs(title = "Trimmed Forward Read Quality")

reverse_filteredQual_plot_14 <- 
  plotQualityProfile(filtered_reverse_reads[random_samples]) + 
  labs(title = "Trimmed Reverse Read Quality")

# Put the two plots together 
forward_filteredQual_plot_14 + reverse_filteredQual_plot_14

Aggregated Trimmed Plots

#Aggregate all QC plots
plotQualityProfile(filtered_forward_reads, aggregate = TRUE) + 
 plotQualityProfile(filtered_reverse_reads, aggregate = TRUE)

Stats on read output from filterAndTrim

#Make output into dataframe
filtered_df <- as.data.frame(filtered_reads)
head(filtered_df)
##                                 reads.in reads.out
## 568_4_S245_L001_R1_001.fastq.gz    52502     25983
## 568_5_S246_L001_R1_001.fastq.gz    38610     18739
## 581_5_S247_L001_R1_001.fastq.gz    35144     17699
## 611_5_S248_L001_R1_001.fastq.gz    30511     15091
## E03_5_S146_L001_R1_001.fastq.gz    16548      7855
## E05_5_S147_L001_R1_001.fastq.gz   107092     47810
# calculate some stats
filtered_df %>%
  reframe(median_reads_in = median(reads.in),
          median_reads_out = median(reads.out),
          median_percent_retained = (median(reads.out)/median(reads.in)))
##   median_reads_in median_reads_out median_percent_retained
## 1           45556          21591.5               0.4739551

Error Modelling

Note every sequencing run needs to be run separately! The error model MUST be run separately on each illumina dataset. If you’d like to combine the datasets from multiple sequencing runs, you’ll need to do the exact same filterAndTrim() step AND, very importantly, you’ll need to have the same primer and ASV length expected by the output.

Infer error rates for all possible transitions within purines and pyrimidines (A<>G or C<>T) and transversions between all purine and pyrimidine combinations.

Error model is learned by alternating estimation of the error rates and inference of sample composition until they converge.

  1. Starts with the assumption that the error rates are the maximum (takes the most abundant sequence (“center”) and assumes it’s the only sequence not caused by errors).
  2. Compares the other sequences to the most abundant sequence.
  3. Uses at most 108 nucleotides for the error estimation.
  4. Uses parametric error estimation function of loess fit on the observed error rates.
#Forward reads
error_forward_reads <-
  learnErrors(filtered_forward_reads, multithread = TRUE)
## 75151368 total bases in 318438 reads from 14 samples will be used for learning the error rates.
#Plot forward reads errors
forward_error_plot <-
  plotErrors(error_forward_reads, nominalQ = TRUE) + 
  labs(title = "Forward Read Error Model")

#Reverse reads
error_reverse_reads <-
  learnErrors(filtered_reverse_reads, multithread = TRUE)
## 70374798 total bases in 318438 reads from 14 samples will be used for learning the error rates.
#Plot reverse reads errors
reverse_error_plot <-
  plotErrors(error_reverse_reads, nominalQ = TRUE) +
  labs(title = "Reverse Read Error Model")

# Put the two plots together
forward_error_plot + reverse_error_plot
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.

Both the forward and reverse reads seem to fit the error models pretty well. A2T, C2T, G2T, and T2G seem to have slightly higher errors than expected.

  • The error rates for each possible transition (A→C, A→G, …) are shown in the plot above.

Details of the plot: - Points: The observed error rates for each consensus quality score.
- Black line: Estimated error rates after convergence of the machine-learning algorithm.
- Red line: The error rates expected under the nominal definition of the Q-score.

Similar to what is mentioned in the dada2 tutorial: the estimated error rates (black line) are a “reasonably good” fit to the observed rates (points), and the error rates drop with increased quality as expected. We can now infer ASVs!

Infer ASVs

An important note: This process occurs separately on forward and reverse reads! This is quite a different approach from how OTUs are identified in Mothur and also from UCHIME, oligotyping, and other OTU, MED, and ASV approaches.

#Infer forward ASVs
dada_forward <- dada(filtered_forward_reads, 
                     err = error_forward_reads, multithread = TRUE)
## Sample 1 - 25983 reads in 2046 unique sequences.
## Sample 2 - 18739 reads in 3640 unique sequences.
## Sample 3 - 17699 reads in 3523 unique sequences.
## Sample 4 - 15091 reads in 2923 unique sequences.
## Sample 5 - 7855 reads in 2138 unique sequences.
## Sample 6 - 47810 reads in 9938 unique sequences.
## Sample 7 - 45242 reads in 3730 unique sequences.
## Sample 8 - 28698 reads in 5804 unique sequences.
## Sample 9 - 40995 reads in 6048 unique sequences.
## Sample 10 - 37948 reads in 7660 unique sequences.
## Sample 11 - 7645 reads in 1650 unique sequences.
## Sample 12 - 24444 reads in 6346 unique sequences.
## Sample 13 - 36 reads in 25 unique sequences.
## Sample 14 - 253 reads in 168 unique sequences.
typeof(dada_forward)
## [1] "list"
#Infer reverse ASVs
dada_reverse <- dada(filtered_reverse_reads, 
                     err = error_reverse_reads, multithread = TRUE)
## Sample 1 - 25983 reads in 3883 unique sequences.
## Sample 2 - 18739 reads in 6398 unique sequences.
## Sample 3 - 17699 reads in 6328 unique sequences.
## Sample 4 - 15091 reads in 5140 unique sequences.
## Sample 5 - 7855 reads in 3009 unique sequences.
## Sample 6 - 47810 reads in 14095 unique sequences.
## Sample 7 - 45242 reads in 6052 unique sequences.
## Sample 8 - 28698 reads in 8292 unique sequences.
## Sample 9 - 40995 reads in 9295 unique sequences.
## Sample 10 - 37948 reads in 11519 unique sequences.
## Sample 11 - 7645 reads in 2285 unique sequences.
## Sample 12 - 24444 reads in 8863 unique sequences.
## Sample 13 - 36 reads in 28 unique sequences.
## Sample 14 - 253 reads in 191 unique sequences.
# Inspect 
dada_reverse[1]
## $`568_4_R2_filtered.fastq.gz`
## dada-class: object describing DADA2 denoising results
## 56 sequence variants were inferred from 3883 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
dada_reverse[13]
## $Neg1_S148_R2_filtered.fastq.gz
## dada-class: object describing DADA2 denoising results
## 5 sequence variants were inferred from 28 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16

Sample 13 and 14 have low reads. These are our negative controls.

Merge Forward & Reverse ASVs

Now, merge the forward and reverse ASVs into contigs.

# merge forward and reverse ASVs
merged_ASVs <- mergePairs(dada_forward, filtered_forward_reads, 
                          dada_reverse, filtered_reverse_reads, 
                          verbose = TRUE)
## 24787 paired-reads (in 52 unique pairings) successfully merged out of 25682 (in 70 pairings) input.
## 17389 paired-reads (in 438 unique pairings) successfully merged out of 17992 (in 529 pairings) input.
## 16071 paired-reads (in 443 unique pairings) successfully merged out of 16906 (in 594 pairings) input.
## 13943 paired-reads (in 324 unique pairings) successfully merged out of 14503 (in 405 pairings) input.
## 7131 paired-reads (in 261 unique pairings) successfully merged out of 7379 (in 318 pairings) input.
## 45847 paired-reads (in 614 unique pairings) successfully merged out of 46839 (in 767 pairings) input.
## 44201 paired-reads (in 63 unique pairings) successfully merged out of 45084 (in 96 pairings) input.
## 27674 paired-reads (in 415 unique pairings) successfully merged out of 28100 (in 481 pairings) input.
## 39686 paired-reads (in 385 unique pairings) successfully merged out of 40343 (in 485 pairings) input.
## 36730 paired-reads (in 526 unique pairings) successfully merged out of 37260 (in 628 pairings) input.
## 7155 paired-reads (in 134 unique pairings) successfully merged out of 7272 (in 163 pairings) input.
## 22883 paired-reads (in 491 unique pairings) successfully merged out of 23642 (in 613 pairings) input.
## 12 paired-reads (in 5 unique pairings) successfully merged out of 12 (in 5 pairings) input.
## 122 paired-reads (in 26 unique pairings) successfully merged out of 140 (in 33 pairings) input.
# Evaluate output
typeof(merged_ASVs)
## [1] "list"
length(merged_ASVs)
## [1] 14
names(merged_ASVs)
##  [1] "568_4_R1_filtered.fastq.gz"     "568_5_R1_filtered.fastq.gz"    
##  [3] "581_5_R1_filtered.fastq.gz"     "611_5_R1_filtered.fastq.gz"    
##  [5] "E03_5_R1_filtered.fastq.gz"     "E05_5_R1_filtered.fastq.gz"    
##  [7] "E1_4_R1_filtered.fastq.gz"      "E1_5_R1_filtered.fastq.gz"     
##  [9] "E2_4_R1_filtered.fastq.gz"      "E2_5_R1_filtered.fastq.gz"     
## [11] "E3_4_R1_filtered.fastq.gz"      "E3_5_R1_filtered.fastq.gz"     
## [13] "Neg1_S148_R1_filtered.fastq.gz" "Neg2_S249_R1_filtered.fastq.gz"

Create Raw ASV Count Table

# Create the ASV Count Table 
raw_ASV_table <- makeSequenceTable(merged_ASVs)

# Write out the file to data/01_DADA2


# Check the type and dimensions of the data
dim(raw_ASV_table)
## [1]   14 1806
class(raw_ASV_table)
## [1] "matrix" "array"
typeof(raw_ASV_table)
## [1] "integer"
# Inspect the distribution of sequence lengths of all ASVs in dataset 
table(nchar(getSequences(raw_ASV_table)))
## 
##  236  237  291  305 
## 1800    3    1    2
# Inspect the distribution of sequence lengths of all ASVs in dataset 
# AFTER TRIM
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table))) %>%
  ggplot(aes(x = Seq_Length )) + 
  geom_histogram() + 
  labs(title = "Raw distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

###################################################
###################################################
# TRIM THE ASVS
# Let's trim the ASVs to only be the right size, which is 236.
# 236 originates from our expected amplicon being trimmed due to low quality at the ends

# We will allow for only one length
raw_ASV_table_trimmed <- raw_ASV_table[,nchar(colnames(raw_ASV_table)) %in% 236]

# Inspect the distribution of sequence lengths of all ASVs in dataset 
table(nchar(getSequences(raw_ASV_table_trimmed)))
## 
##  236 
## 1800
# What proportion is left of the sequences? 
sum(raw_ASV_table_trimmed)/sum(raw_ASV_table)
## [1] 0.9998024
# Inspect the distribution of sequence lengths of all ASVs in dataset 
# AFTER TRIM
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table_trimmed))) %>%
  ggplot(aes(x = Seq_Length )) + 
  geom_histogram() + 
  labs(title = "Trimmed distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Remove Chimeras

Sometimes chimeras arise in our workflow.

Chimeric sequences are artificial sequences formed by the combination of two or more distinct biological sequences. These chimeric sequences can arise during the polymerase chain reaction (PCR) amplification step of the 16S rRNA gene, where fragments from different templates can be erroneously joined together.

Chimera removal is an essential step in the analysis of 16S sequencing data to improve the accuracy of downstream analyses, such as taxonomic assignment and diversity assessment. It helps to avoid the inclusion of misleading or spurious sequences that could lead to incorrect biological interpretations.

# Remove the chimeras in the raw ASV table
noChimeras_ASV_table <- removeBimeraDenovo(raw_ASV_table_trimmed, 
                                           method="consensus", 
                                           multithread=TRUE, verbose=TRUE)
## Identified 36 bimeras out of 1800 input sequences.
# Check the dimensions
dim(noChimeras_ASV_table)
## [1]   14 1764
# What proportion is left of the sequences? 
sum(noChimeras_ASV_table)/sum(raw_ASV_table_trimmed)
## [1] 0.9936489
sum(noChimeras_ASV_table)/sum(raw_ASV_table)
## [1] 0.9934526
# Plot it 
data.frame(Seq_Length_NoChim = nchar(getSequences(noChimeras_ASV_table))) %>%
  ggplot(aes(x = Seq_Length_NoChim )) + 
  geom_histogram()+ 
  labs(title = "Trimmed + Chimera Removal distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Track the read counts

Here, we will look at the number of reads that were lost in the filtering, denoising, merging, and chimera removal.

# A little function to identify number seqs 
getN <- function(x) sum(getUniques(x))

# Make the table to track the seqs 
track <- cbind(filtered_reads, 
               sapply(dada_forward, getN),
               sapply(dada_reverse, getN),
               sapply(merged_ASVs, getN),
               rowSums(noChimeras_ASV_table))

head(track)
##                                 reads.in reads.out                        
## 568_4_S245_L001_R1_001.fastq.gz    52502     25983 25713 25724 24787 24522
## 568_5_S246_L001_R1_001.fastq.gz    38610     18739 18165 18254 17389 17305
## 581_5_S247_L001_R1_001.fastq.gz    35144     17699 17222 17193 16071 15963
## 611_5_S248_L001_R1_001.fastq.gz    30511     15091 14673 14708 13943 13898
## E03_5_S146_L001_R1_001.fastq.gz    16548      7855  7500  7528  7131  7131
## E05_5_S147_L001_R1_001.fastq.gz   107092     47810 47106 47221 45847 45378
# Update column names to be more informative (most are missing at the moment!)
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nochim")
rownames(track) <- samples

# Generate a dataframe to track the reads through our DADA2 pipeline
track_counts_df <- 
  track %>%
  # make it a dataframe
  as.data.frame() %>%
  rownames_to_column(var = "names") %>%
  mutate(perc_reads_retained = 100 * nochim / input)

# Visualize it in table format 
DT::datatable(track_counts_df)
# Plot it!
track_counts_df %>%
  pivot_longer(input:nochim, names_to = "read_type", values_to = "num_reads") %>%
  mutate(read_type = fct_relevel(read_type, 
                                 "input", "filtered", "denoisedF", "denoisedR", "merged", "nochim")) %>%
  ggplot(aes(x = read_type, y = num_reads, fill = read_type)) + 
  geom_line(aes(group = names), color = "grey") + 
  geom_point(shape = 21, size = 3, alpha = 0.8) + 
  scale_fill_brewer(palette = "Spectral") + 
  labs(x = "Filtering Step", y = "Number of Sequences") + 
  theme_bw()

Assign Taxonomy

Here, we will use the silva database version 138!

taxa_train <- 
  assignTaxonomy(noChimeras_ASV_table, 
                 "/workdir/in_class_data/taxonomy/silva_nr99_v138.1_train_set.fa.gz", 
                 multithread=TRUE)

taxa_addSpecies <- 
  addSpecies(taxa_train, 
             "/workdir/in_class_data/taxonomy/silva_species_assignment_v138.1.fa.gz")

# Inspect the taxonomy 
taxa_print <- taxa_addSpecies # Removing sequence rownames for display only
rownames(taxa_print) <- NULL
#View(taxa_print)

Prepare the data for export!

1. ASV Table

Below, we will prepare the following:

  1. Two ASV Count tables:
    1. With ASV seqs: ASV headers include the entire ASV sequence ~250bps.
    2. with ASV names: This includes re-written and shortened headers like ASV_1, ASV_2, etc, which will match the names in our fasta file below.
  2. ASV_fastas: A fasta file that we can use to build a tree for phylogenetic analyses (e.g. phylogenetic alpha diversity metrics or UNIFRAC dissimilarty).

Finalize ASV Count Tables

########### 2. COUNT TABLE ###############
############## Modify the ASV names and then save a fasta file!  ############## 
# Give headers more manageable names
# First pull the ASV sequences
asv_seqs <- colnames(noChimeras_ASV_table)
asv_seqs[1:5]
## [1] "TGCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGTCTATTAAGTCAGTGGTGAAAGTTTGCAGCTTAACTGTAAAAGTGCCATTGATACTGGTAGACTTGAGTGTGGTGAAGGTAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATACCACGAAGAACACCGATAGCGAAGGCAGCTTACTGTACCATTACTGACGCTGAGGCACGAAAGCGTGGGGAG"
## [2] "GGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGC"
## [3] "TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAG"
## [4] "TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTTAACCTGGGAACGGCATCCAAAACTGAGAGGCTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAG"
## [5] "TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAGTGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAG"
# make headers for our ASV seq fasta file, which will be our asv names
asv_headers <- vector(dim(noChimeras_ASV_table)[2], mode = "character")
asv_headers[1:5]
## [1] "" "" "" "" ""
# loop through vector and fill it in with ASV names 
for (i in 1:dim(noChimeras_ASV_table)[2]) {
  asv_headers[i] <- paste(">ASV", i, sep = "_")
}

# intitution check
asv_headers[1:5]
## [1] ">ASV_1" ">ASV_2" ">ASV_3" ">ASV_4" ">ASV_5"
##### Rename ASVs in table then write out our ASV fasta file! 
#View(noChimeras_ASV_table)
asv_tab <- t(noChimeras_ASV_table)
#View(asv_tab)

## Rename our asvs! 
row.names(asv_tab) <- sub(">", "", asv_headers)
#View(asv_tab)

2. Taxonomy Table

# Inspect the taxonomy table
#View(taxa_addSpecies)

##### Prepare tax table 
# Add the ASV sequences from the rownames to a column 
new_tax_tab <- 
  taxa_addSpecies%>%
  as.data.frame() %>%
  rownames_to_column(var = "ASVseqs") 
head(new_tax_tab)
##                                                                                                                                                                                                                                        ASVseqs
## 1 TGCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGTCTATTAAGTCAGTGGTGAAAGTTTGCAGCTTAACTGTAAAAGTGCCATTGATACTGGTAGACTTGAGTGTGGTGAAGGTAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATACCACGAAGAACACCGATAGCGAAGGCAGCTTACTGTACCATTACTGACGCTGAGGCACGAAAGCGTGGGGAG
## 2 GGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGC
## 3 TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAG
## 4 TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTTAACCTGGGAACGGCATCCAAAACTGAGAGGCTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAG
## 5 TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAGTGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAG
## 6 TGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCAAAACTGGGAGGCTCGAGTGCGGAAGAGGAGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACACTCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAG
##    Kingdom         Phylum               Class
## 1 Bacteria   Bacteroidota         Bacteroidia
## 2 Bacteria     Firmicutes          Clostridia
## 3 Bacteria Proteobacteria Gammaproteobacteria
## 4 Bacteria Proteobacteria Gammaproteobacteria
## 5 Bacteria Proteobacteria Gammaproteobacteria
## 6 Bacteria Proteobacteria Gammaproteobacteria
##                                 Order                Family          Genus
## 1                        Cytophagales     Cyclobacteriaceae    Aureibacter
## 2 Peptostreptococcales-Tissierellales Peptostreptococcaceae     Romboutsia
## 3                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
## 4                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
## 5                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
## 6                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
##        Species
## 1         <NA>
## 2 sedimentorum
## 3         <NA>
## 4         <NA>
## 5         <NA>
## 6         <NA>
# intution check 
stopifnot(new_tax_tab$ASVseqs == colnames(noChimeras_ASV_table))

# Now let's add the ASV names 
rownames(new_tax_tab) <- rownames(asv_tab)
head(new_tax_tab)
##                                                                                                                                                                                                                                            ASVseqs
## ASV_1 TGCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGTCTATTAAGTCAGTGGTGAAAGTTTGCAGCTTAACTGTAAAAGTGCCATTGATACTGGTAGACTTGAGTGTGGTGAAGGTAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATACCACGAAGAACACCGATAGCGAAGGCAGCTTACTGTACCATTACTGACGCTGAGGCACGAAAGCGTGGGGAG
## ASV_2 GGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGC
## ASV_3 TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAG
## ASV_4 TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTTAACCTGGGAACGGCATCCAAAACTGAGAGGCTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAG
## ASV_5 TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAGTGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAG
## ASV_6 TGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCAAAACTGGGAGGCTCGAGTGCGGAAGAGGAGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACACTCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAG
##        Kingdom         Phylum               Class
## ASV_1 Bacteria   Bacteroidota         Bacteroidia
## ASV_2 Bacteria     Firmicutes          Clostridia
## ASV_3 Bacteria Proteobacteria Gammaproteobacteria
## ASV_4 Bacteria Proteobacteria Gammaproteobacteria
## ASV_5 Bacteria Proteobacteria Gammaproteobacteria
## ASV_6 Bacteria Proteobacteria Gammaproteobacteria
##                                     Order                Family          Genus
## ASV_1                        Cytophagales     Cyclobacteriaceae    Aureibacter
## ASV_2 Peptostreptococcales-Tissierellales Peptostreptococcaceae     Romboutsia
## ASV_3                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
## ASV_4                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
## ASV_5                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
## ASV_6                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
##            Species
## ASV_1         <NA>
## ASV_2 sedimentorum
## ASV_3         <NA>
## ASV_4         <NA>
## ASV_5         <NA>
## ASV_6         <NA>
### Final prep of tax table. Add new column with ASV names 
asv_tax <- 
  new_tax_tab %>%
  # add rownames from count table for phyloseq handoff
  mutate(ASV = rownames(asv_tab)) %>%
  # Resort the columns with select
  dplyr::select(Kingdom, Phylum, Class, Order, Family, Genus, Species, ASV, ASVseqs)

head(asv_tax)
##        Kingdom         Phylum               Class
## ASV_1 Bacteria   Bacteroidota         Bacteroidia
## ASV_2 Bacteria     Firmicutes          Clostridia
## ASV_3 Bacteria Proteobacteria Gammaproteobacteria
## ASV_4 Bacteria Proteobacteria Gammaproteobacteria
## ASV_5 Bacteria Proteobacteria Gammaproteobacteria
## ASV_6 Bacteria Proteobacteria Gammaproteobacteria
##                                     Order                Family          Genus
## ASV_1                        Cytophagales     Cyclobacteriaceae    Aureibacter
## ASV_2 Peptostreptococcales-Tissierellales Peptostreptococcaceae     Romboutsia
## ASV_3                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
## ASV_4                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
## ASV_5                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
## ASV_6                     Pseudomonadales   Endozoicomonadaceae Endozoicomonas
##            Species   ASV
## ASV_1         <NA> ASV_1
## ASV_2 sedimentorum ASV_2
## ASV_3         <NA> ASV_3
## ASV_4         <NA> ASV_4
## ASV_5         <NA> ASV_5
## ASV_6         <NA> ASV_6
##                                                                                                                                                                                                                                            ASVseqs
## ASV_1 TGCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGTCTATTAAGTCAGTGGTGAAAGTTTGCAGCTTAACTGTAAAAGTGCCATTGATACTGGTAGACTTGAGTGTGGTGAAGGTAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATACCACGAAGAACACCGATAGCGAAGGCAGCTTACTGTACCATTACTGACGCTGAGGCACGAAAGCGTGGGGAG
## ASV_2 GGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGC
## ASV_3 TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAG
## ASV_4 TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTTAACCTGGGAACGGCATCCAAAACTGAGAGGCTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAG
## ASV_5 TGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAGTGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAG
## ASV_6 TGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCAAAACTGGGAGGCTCGAGTGCGGAAGAGGAGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACACTCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAG
# Intution check
stopifnot(asv_tax$ASV == rownames(asv_tax), rownames(asv_tax) == rownames(asv_tab))

Write 01_DADA2 files

Now, we will write the files! We will write the following to the data/01_DADA2/ folder. We will save both as files that could be submitted as supplements AND as .RData objects for easy loading into the next steps into R.:

  1. ASV_counts.tsv: ASV count table that has ASV names that are re-written and shortened headers like ASV_1, ASV_2, etc, which will match the names in our fasta file below. This will also be saved as data/01_DADA2/ASV_counts.RData.
  2. ASV_counts_withSeqNames.tsv: This is generated with the data object in this file known as noChimeras_ASV_table. ASV headers include the entire ASV sequence ~250bps. In addition, we will save this as a .RData object as data/01_DADA2/noChimeras_ASV_table.RData as we will use this data in analysis/02_Taxonomic_Assignment.Rmd to assign the taxonomy from the sequence headers.
  3. ASVs.fasta: A fasta file output of the ASV names from ASV_counts.tsv and the sequences from the ASVs in ASV_counts_withSeqNames.tsv. A fasta file that we can use to build a tree for phylogenetic analyses (e.g. phylogenetic alpha diversity metrics or UNIFRAC dissimilarty).
  4. We will also make a copy of ASVs.fasta in data/02_TaxAss_FreshTrain/ to be used for the taxonomy classification in the next step in the workflow.
  5. Write out the taxonomy table
  6. track_read_counts.RData: To track how many reads we lost throughout our workflow that could be used and plotted later. We will add this to the metadata in analysis/02_Taxonomic_Assignment.Rmd.
# FIRST, we will save our output as regular files, which will be useful later on. 
# Save to regular .tsv file 
# Write BOTH the modified and unmodified ASV tables to a file!
# Write count table with ASV numbered names (e.g. ASV_1, ASV_2, etc)
write.table(asv_tab, "data/01_DADA2/ASV_counts.tsv", sep = "\t", quote = FALSE, col.names = NA)
# Write count table with ASV sequence names
write.table(noChimeras_ASV_table, "data/01_DADA2/ASV_counts_withSeqNames.tsv", sep = "\t", quote = FALSE, col.names = NA)
# Write out the fasta file for reference later on for what seq matches what ASV
asv_fasta <- c(rbind(asv_headers, asv_seqs))
# Save to a file!
write(asv_fasta, "data/01_DADA2/ASVs.fasta")


# SECOND, let's save the taxonomy tables 
# Write the table 
write.table(asv_tax, "data/01_DADA2/ASV_taxonomy.tsv", sep = "\t", quote = FALSE, col.names = NA)


# THIRD, let's save to a RData object 
# Each of these files will be used in the analysis/02_Taxonomic_Assignment
# RData objects are for easy loading :) 
save(noChimeras_ASV_table, file = "data/01_DADA2/noChimeras_ASV_table.RData")
save(asv_tab, file = "data/01_DADA2/ASV_counts.RData")
# And save the track_counts_df a R object, which we will merge with metadata information in the next step of the analysis in nalysis/02_Taxonomic_Assignment. 
save(track_counts_df, file = "data/01_DADA2/track_read_counts.RData")

##Session information

#Ensure reproducibility
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.2 (2023-10-31)
##  os       Rocky Linux 9.0 (Blue Onyx)
##  system   x86_64, linux-gnu
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       America/New_York
##  date     2024-03-14
##  pandoc   3.1.1 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package              * version    date (UTC) lib source
##  abind                  1.4-5      2016-07-21 [2] CRAN (R 4.3.2)
##  ade4                   1.7-22     2023-02-06 [1] CRAN (R 4.3.2)
##  ape                    5.7-1      2023-03-13 [2] CRAN (R 4.3.2)
##  Biobase                2.62.0     2023-10-24 [2] Bioconductor
##  BiocGenerics           0.48.1     2023-11-01 [2] Bioconductor
##  BiocParallel           1.36.0     2023-10-24 [2] Bioconductor
##  biomformat             1.30.0     2023-10-24 [1] Bioconductor
##  Biostrings             2.70.1     2023-10-25 [2] Bioconductor
##  bitops                 1.0-7      2021-04-24 [2] CRAN (R 4.3.2)
##  bslib                  0.5.1      2023-08-11 [2] CRAN (R 4.3.2)
##  cachem                 1.0.8      2023-05-01 [2] CRAN (R 4.3.2)
##  callr                  3.7.3      2022-11-02 [2] CRAN (R 4.3.2)
##  cli                    3.6.1      2023-03-23 [2] CRAN (R 4.3.2)
##  cluster                2.1.4      2022-08-22 [2] CRAN (R 4.3.2)
##  codetools              0.2-19     2023-02-01 [2] CRAN (R 4.3.2)
##  colorspace             2.1-0      2023-01-23 [2] CRAN (R 4.3.2)
##  crayon                 1.5.2      2022-09-29 [2] CRAN (R 4.3.2)
##  crosstalk              1.2.0      2021-11-04 [2] CRAN (R 4.3.2)
##  dada2                * 1.30.0     2023-10-24 [1] Bioconductor
##  data.table             1.14.8     2023-02-17 [2] CRAN (R 4.3.2)
##  DelayedArray           0.28.0     2023-10-24 [2] Bioconductor
##  deldir                 1.0-9      2023-05-17 [2] CRAN (R 4.3.2)
##  devtools             * 2.4.4      2022-07-20 [2] CRAN (R 4.2.1)
##  digest                 0.6.33     2023-07-07 [2] CRAN (R 4.3.2)
##  dplyr                * 1.1.3      2023-09-03 [2] CRAN (R 4.3.2)
##  DT                   * 0.32       2024-02-19 [1] CRAN (R 4.3.2)
##  ellipsis               0.3.2      2021-04-29 [2] CRAN (R 4.3.2)
##  evaluate               0.23       2023-11-01 [2] CRAN (R 4.3.2)
##  fansi                  1.0.5      2023-10-08 [2] CRAN (R 4.3.2)
##  farver                 2.1.1      2022-07-06 [2] CRAN (R 4.3.2)
##  fastmap                1.1.1      2023-02-24 [2] CRAN (R 4.3.2)
##  forcats              * 1.0.0      2023-01-29 [1] CRAN (R 4.3.2)
##  foreach                1.5.2      2022-02-02 [2] CRAN (R 4.3.2)
##  fs                     1.6.3      2023-07-20 [2] CRAN (R 4.3.2)
##  generics               0.1.3      2022-07-05 [2] CRAN (R 4.3.2)
##  GenomeInfoDb           1.38.0     2023-10-24 [2] Bioconductor
##  GenomeInfoDbData       1.2.11     2023-11-07 [2] Bioconductor
##  GenomicAlignments      1.38.0     2023-10-24 [2] Bioconductor
##  GenomicRanges          1.54.1     2023-10-29 [2] Bioconductor
##  ggplot2              * 3.5.0      2024-02-23 [2] CRAN (R 4.3.2)
##  glue                   1.6.2      2022-02-24 [2] CRAN (R 4.3.2)
##  gtable                 0.3.4      2023-08-21 [2] CRAN (R 4.3.2)
##  highr                  0.10       2022-12-22 [2] CRAN (R 4.3.2)
##  hms                    1.1.3      2023-03-21 [1] CRAN (R 4.3.2)
##  htmltools              0.5.7      2023-11-03 [2] CRAN (R 4.3.2)
##  htmlwidgets            1.6.2      2023-03-17 [2] CRAN (R 4.3.2)
##  httpuv                 1.6.12     2023-10-23 [2] CRAN (R 4.3.2)
##  hwriter                1.3.2.1    2022-04-08 [1] CRAN (R 4.3.2)
##  igraph                 1.5.1      2023-08-10 [2] CRAN (R 4.3.2)
##  interp                 1.1-6      2024-01-26 [1] CRAN (R 4.3.2)
##  IRanges                2.36.0     2023-10-24 [2] Bioconductor
##  iterators              1.0.14     2022-02-05 [2] CRAN (R 4.3.2)
##  jpeg                   0.1-10     2022-11-29 [1] CRAN (R 4.3.2)
##  jquerylib              0.1.4      2021-04-26 [2] CRAN (R 4.3.2)
##  jsonlite               1.8.7      2023-06-29 [2] CRAN (R 4.3.2)
##  knitr                  1.45       2023-10-30 [2] CRAN (R 4.3.2)
##  labeling               0.4.3      2023-08-29 [2] CRAN (R 4.3.2)
##  later                  1.3.1      2023-05-02 [2] CRAN (R 4.3.2)
##  lattice                0.21-9     2023-10-01 [2] CRAN (R 4.3.2)
##  latticeExtra           0.6-30     2022-07-04 [1] CRAN (R 4.3.2)
##  lifecycle              1.0.3      2022-10-07 [2] CRAN (R 4.3.2)
##  lubridate            * 1.9.3      2023-09-27 [1] CRAN (R 4.3.2)
##  magrittr               2.0.3      2022-03-30 [2] CRAN (R 4.3.2)
##  MASS                   7.3-60     2023-05-04 [2] CRAN (R 4.3.2)
##  Matrix                 1.6-1.1    2023-09-18 [2] CRAN (R 4.3.2)
##  MatrixGenerics         1.14.0     2023-10-24 [2] Bioconductor
##  matrixStats            1.1.0      2023-11-07 [2] CRAN (R 4.3.2)
##  memoise                2.0.1      2021-11-26 [2] CRAN (R 4.3.2)
##  mgcv                   1.9-0      2023-07-11 [2] CRAN (R 4.3.2)
##  mime                   0.12       2021-09-28 [2] CRAN (R 4.3.2)
##  miniUI                 0.1.1.1    2018-05-18 [2] CRAN (R 4.3.2)
##  multtest               2.58.0     2023-10-24 [1] Bioconductor
##  munsell                0.5.0      2018-06-12 [2] CRAN (R 4.3.2)
##  nlme                   3.1-163    2023-08-09 [2] CRAN (R 4.3.2)
##  pacman                 0.5.1      2019-03-11 [1] CRAN (R 4.3.2)
##  patchwork            * 1.2.0.9000 2024-03-12 [1] Github (thomasp85/patchwork@d943757)
##  permute                0.9-7      2022-01-27 [1] CRAN (R 4.3.2)
##  phyloseq             * 1.41.1     2024-03-09 [1] Github (joey711/phyloseq@c260561)
##  pillar                 1.9.0      2023-03-22 [2] CRAN (R 4.3.2)
##  pkgbuild               1.4.2      2023-06-26 [2] CRAN (R 4.3.2)
##  pkgconfig              2.0.3      2019-09-22 [2] CRAN (R 4.3.2)
##  pkgload                1.3.3      2023-09-22 [2] CRAN (R 4.3.2)
##  plyr                   1.8.9      2023-10-02 [2] CRAN (R 4.3.2)
##  png                    0.1-8      2022-11-29 [2] CRAN (R 4.3.2)
##  prettyunits            1.2.0      2023-09-24 [2] CRAN (R 4.3.2)
##  processx               3.8.2      2023-06-30 [2] CRAN (R 4.3.2)
##  profvis                0.3.8      2023-05-02 [2] CRAN (R 4.3.2)
##  promises               1.2.1      2023-08-10 [2] CRAN (R 4.3.2)
##  ps                     1.7.5      2023-04-18 [2] CRAN (R 4.3.2)
##  purrr                * 1.0.2      2023-08-10 [2] CRAN (R 4.3.2)
##  R6                     2.5.1      2021-08-19 [2] CRAN (R 4.3.2)
##  RColorBrewer           1.1-3      2022-04-03 [2] CRAN (R 4.3.2)
##  Rcpp                 * 1.0.11     2023-07-06 [2] CRAN (R 4.3.2)
##  RcppParallel           5.1.7      2023-02-27 [2] CRAN (R 4.3.2)
##  RCurl                  1.98-1.13  2023-11-02 [2] CRAN (R 4.3.2)
##  readr                * 2.1.5      2024-01-10 [1] CRAN (R 4.3.2)
##  remotes                2.4.2.1    2023-07-18 [2] CRAN (R 4.3.2)
##  reshape2               1.4.4      2020-04-09 [2] CRAN (R 4.3.2)
##  rhdf5                  2.46.1     2023-11-29 [1] Bioconductor 3.18 (R 4.3.2)
##  rhdf5filters           1.14.1     2023-11-06 [1] Bioconductor
##  Rhdf5lib               1.24.2     2024-02-07 [1] Bioconductor 3.18 (R 4.3.2)
##  rlang                  1.1.2      2023-11-04 [2] CRAN (R 4.3.2)
##  rmarkdown              2.25       2023-09-18 [2] CRAN (R 4.3.2)
##  Rsamtools              2.18.0     2023-10-24 [2] Bioconductor
##  rstudioapi             0.15.0     2023-07-07 [2] CRAN (R 4.3.2)
##  S4Arrays               1.2.0      2023-10-24 [2] Bioconductor
##  S4Vectors              0.40.1     2023-10-26 [2] Bioconductor
##  sass                   0.4.7      2023-07-15 [2] CRAN (R 4.3.2)
##  scales                 1.3.0      2023-11-28 [2] CRAN (R 4.3.2)
##  sessioninfo            1.2.2      2021-12-06 [2] CRAN (R 4.3.2)
##  shiny                  1.7.5.1    2023-10-14 [2] CRAN (R 4.3.2)
##  ShortRead              1.60.0     2023-10-24 [1] Bioconductor
##  SparseArray            1.2.1      2023-11-05 [2] Bioconductor
##  stringi                1.7.12     2023-01-11 [2] CRAN (R 4.3.2)
##  stringr              * 1.5.0      2022-12-02 [2] CRAN (R 4.3.2)
##  SummarizedExperiment   1.32.0     2023-10-24 [2] Bioconductor
##  survival               3.5-7      2023-08-14 [2] CRAN (R 4.3.2)
##  tibble               * 3.2.1      2023-03-20 [2] CRAN (R 4.3.2)
##  tidyr                * 1.3.0      2023-01-24 [2] CRAN (R 4.3.2)
##  tidyselect             1.2.0      2022-10-10 [2] CRAN (R 4.3.2)
##  tidyverse            * 2.0.0      2023-02-22 [1] CRAN (R 4.3.2)
##  timechange             0.3.0      2024-01-18 [1] CRAN (R 4.3.2)
##  tzdb                   0.4.0      2023-05-12 [1] CRAN (R 4.3.2)
##  urlchecker             1.0.1      2021-11-30 [2] CRAN (R 4.3.2)
##  usethis              * 2.2.2      2023-07-06 [2] CRAN (R 4.3.2)
##  utf8                   1.2.4      2023-10-22 [2] CRAN (R 4.3.2)
##  vctrs                  0.6.4      2023-10-12 [2] CRAN (R 4.3.2)
##  vegan                  2.6-4      2022-10-11 [1] CRAN (R 4.3.2)
##  withr                  2.5.2      2023-10-30 [2] CRAN (R 4.3.2)
##  xfun                   0.41       2023-11-01 [2] CRAN (R 4.3.2)
##  xtable                 1.8-4      2019-04-21 [2] CRAN (R 4.3.2)
##  XVector                0.42.0     2023-10-24 [2] Bioconductor
##  yaml                   2.3.7      2023-01-23 [2] CRAN (R 4.3.2)
##  zlibbioc               1.48.0     2023-10-24 [2] Bioconductor
## 
##  [1] /home/cab565/R/x86_64-pc-linux-gnu-library/4.3
##  [2] /programs/R-4.3.2/library
## 
## ──────────────────────────────────────────────────────────────────────────────